This RMD describes the analysis of the 23 feature NMR dataset from Jingwei (Andrew Patterson) comparing lean/obese Caucasian and Chinese individuals with n=5 stool samples analysed for each of the four groups.
These 23 features were the most representative fecal metabolites with a good signal.
library(readxl)
library(ggplot2)
library(dplyr)
library(tidyr)
library(tibble)
library(MicrobeR)
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] MicrobeR_0.3.2 tibble_3.0.1 tidyr_1.0.2 dplyr_0.8.5 ggplot2_3.3.0
## [6] readxl_1.3.1
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-144 ggtree_2.0.4 phyloseq_1.30.0
## [4] bit64_0.9-7 httr_1.4.1 tools_3.6.3
## [7] R6_2.4.1 vegan_2.5-6 DT_0.13
## [10] rtk_0.2.5.8 DBI_1.1.0 lazyeval_0.2.2
## [13] BiocGenerics_0.32.0 mgcv_1.8-31 colorspace_1.4-1
## [16] permute_0.9-5 ade4_1.7-15 NADA_1.6-1.1
## [19] withr_2.2.0 philr_1.12.0 tidyselect_1.0.0
## [22] phangorn_2.5.5 bit_1.1-15.2 compiler_3.6.3
## [25] Biobase_2.46.0 plotly_4.9.2.1 scales_1.1.0
## [28] quadprog_1.5-8 stringr_1.4.0 digest_0.6.25
## [31] rmarkdown_2.1 XVector_0.26.0 pkgconfig_2.0.3
## [34] htmltools_0.5.0 htmlwidgets_1.5.1 rlang_0.4.5
## [37] RSQLite_2.2.0 zCompositions_1.3.4 jsonlite_1.6.1
## [40] magrittr_1.5 biomformat_1.14.0 Matrix_1.2-18
## [43] Rcpp_1.0.4 munsell_0.5.0 S4Vectors_0.24.4
## [46] Rhdf5lib_1.8.0 DECIPHER_2.14.0 ape_5.3
## [49] lifecycle_0.2.0 stringi_1.4.6 yaml_2.2.1
## [52] MASS_7.3-51.5 zlibbioc_1.32.0 rhdf5_2.30.1
## [55] plyr_1.8.6 grid_3.6.3 blob_1.2.1
## [58] parallel_3.6.3 crayon_1.3.4 lattice_0.20-38
## [61] Biostrings_2.54.0 splines_3.6.3 multtest_2.42.0
## [64] knitr_1.28 pillar_1.4.3 igraph_1.2.5
## [67] reshape2_1.4.4 codetools_0.2-16 stats4_3.6.3
## [70] fastmatch_1.1-0 picante_1.8.1 glue_1.4.0
## [73] evaluate_0.14 data.table_1.12.8 BiocManager_1.30.10
## [76] vctrs_0.2.4 treeio_1.10.0 foreach_1.5.0
## [79] cellranger_1.1.0 gtable_0.3.0 purrr_0.3.4
## [82] assertthat_0.2.1 xfun_0.13 tidytree_0.3.3
## [85] viridisLite_0.3.0 survival_3.1-8 truncnorm_1.0-8
## [88] iterators_1.0.12 rvcheck_0.1.8 memoise_1.1.0
## [91] IRanges_2.20.2 cluster_2.1.0 ellipsis_0.3.0
theme_pub<- function () {
theme_bw(base_size=10, base_family="Helvetica") +
theme(axis.text = element_text(size=8, color = "black"),
axis.title = element_text(size=10, color="black"),
legend.text = element_text(size=8, color = "black"),
legend.title = element_text(size=10, color = "black"),
panel.border = element_rect(color="black", fill=NA))
}
nmr<-read_excel("/Volumes/turnbaughlab/qb3share/qiyanang/IDEO_manuscript_Rd2/data/metabo_human_nmr_selected.xlsx")
# nmr dataset to long form
nmr_long <- nmr %>% pivot_longer(
cols=Acetate:Xanthine,
names_to="Metabolite",
values_to="Value")
# scale values
nmr_long <- nmr_long %>%
group_by(Metabolite) %>%
mutate(Zscore = Value %>% scale (center = TRUE, scale = TRUE)) # calculate z-score
# distribution of values
ggplot(nmr_long, aes(x=Zscore)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# test significance
results<- nmr_long %>%
group_by(Metabolite) %>%
do(
broom::glance(t.test(Zscore~Ethnicity, data=., paired=F))
) %>%
ungroup() %>%
select(Metabolite, p.value) %>%
mutate(p.adj = p.adjust(p.value, method="fdr")) %>%
mutate(Significant=ifelse(p.value<0.05, "*",""))
Nice.Table(results)
# define row order (metabolites)
nmr_wide <- nmr_long %>%
pivot_wider(id_cols = "Metabolite", names_from="SAMPLE_ID", values_from="Zscore") %>%
column_to_rownames("Metabolite")
roworder <- hclust(dist(nmr_wide), method = "average")
roworder <- roworder$labels[roworder$order]
# define column order (subject)
nmr_wide <- nmr_long %>%
pivot_wider(id_cols = "SAMPLE_ID", names_from="Metabolite", values_from="Zscore") %>%
column_to_rownames("SAMPLE_ID")
colorder <- hclust(dist(nmr_wide), method = "average")
colorder <- colorder$labels[colorder$order]
# heatmap
nmr_long %>%
mutate(Group=factor(Group, levels=c("Lean CA","Obese CA","Lean CH","Obese CH"))) %>%
ggplot(aes(y=factor(Metabolite, levels=roworder), x=factor(`SAMPLE_ID`, levels=colorder), fill = Zscore)) +
geom_tile() +
scale_fill_gradient2(low="cornflowerblue", high="indianred") +
#scale_fill_distiller(type ="seq", palette = "RdBu", na.value = "gray90") +
coord_cartesian(expand=F) +
ylab("") +
xlab("") +
facet_grid(~Ethnicity, margins=F, drop=T, scales="free", space="free") +
theme_bw() +
theme(axis.text = element_text(size = 8))
#ggsave("/Volumes/turnbaughlab/qb3share/qiyanang/IDEO_manuscript/figures/metab_human_heatmap_repfeats.pdf", height=4, width=5, useDingbats=F)
nmr_long <- nmr_long %>%
filter(Metabolite %in% (results %>% filter(p.value<0.05))$Metabolite)
# define row order (metabolites)
nmr_wide <- nmr_long %>%
pivot_wider(id_cols = "Metabolite", names_from="SAMPLE_ID", values_from="Zscore") %>%
column_to_rownames("Metabolite")
roworder <- hclust(dist(nmr_wide), method = "average")
roworder <- roworder$labels[roworder$order]
# define column order (subject)
nmr_wide <- nmr_long %>%
pivot_wider(id_cols = "SAMPLE_ID", names_from="Metabolite", values_from="Zscore") %>%
column_to_rownames("SAMPLE_ID")
colorder <- hclust(dist(nmr_wide), method = "average")
colorder <- colorder$labels[colorder$order]
# heatmap
nmr_long %>%
mutate(Group=factor(Group, levels=c("Lean CA","Obese CA","Lean CH","Obese CH"))) %>%
ggplot(aes(y=factor(Metabolite, levels=roworder), x=factor(`SAMPLE_ID`, levels=colorder), fill = Zscore)) +
geom_tile() +
scale_fill_gradient2(low="cornflowerblue", high="indianred") +
#scale_fill_distiller(type ="seq", palette = "RdBu", na.value = "gray90") +
theme(axis.text = element_text(size = 8), axis.ticks = element_blank()) +
theme_pub() +
coord_cartesian(expand=F) +
ylab("Metabolite") +
xlab("Subject") +
labs(fill="Z-score") +
facet_grid(~Ethnicity, margins=F, drop=T, scales="free", space="free")
#ggsave("/Volumes/turnbaughlab/qb3share/qiyanang/IDEO_manuscript/figures/human_metabolomics/metab_human_heatmap_sigfeats.pdf", height=2.2, width=5, useDingbats=F)